import pandas as pd
import os
os.chdir("D:\Dataset_MidTerm")
ec=pd.read_excel("D:\Dataset_MidTerm\Ecommerce_data.xlsx")
ec.head()
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Postal Code | City | ... | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | Shipping Cost | Order Priority | Unnamed: 24 | values | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 40098 | CA-2014-AB10015140-41954 | 2014-11-11 | 2014-11-13 | First Class | AB-100151402 | Aaron Bergman | Consumer | 73120.0 | Oklahoma City | ... | Phones | Samsung Convoy 3 | 221.980 | 2 | 0.0 | 62.1544 | 40.77 | High | NaN | 110.990 |
| 1 | 26341 | IN-2014-JR162107-41675 | 2014-02-05 | 2014-02-07 | Second Class | JR-162107 | Justin Ritter | Corporate | NaN | Wollongong | ... | Chairs | Novimex Executive Leather Armchair, Black | 3709.395 | 9 | 0.1 | -288.7650 | 923.63 | Critical | NaN | 412.155 |
| 2 | 25330 | IN-2014-CR127307-41929 | 2014-10-17 | 2014-10-18 | First Class | CR-127307 | Craig Reiter | Consumer | NaN | Brisbane | ... | Phones | Nokia Smart Phone, with Caller ID | 5175.171 | 9 | 0.1 | 919.9710 | 915.49 | Medium | NaN | 575.019 |
| 3 | 13524 | ES-2014-KM1637548-41667 | 2014-01-28 | 2014-01-30 | First Class | KM-1637548 | Katherine Murray | Home Office | NaN | Berlin | ... | Phones | Motorola Smart Phone, Cordless | 2892.510 | 5 | 0.1 | -96.5400 | 910.16 | Medium | NaN | 578.502 |
| 4 | 47221 | SG-2014-RH9495111-41948 | 2014-11-05 | 2014-11-06 | Same Day | RH-9495111 | Rick Hansen | Consumer | NaN | Dakar | ... | Copiers | Sharp Wireless Fax, High-Speed | 2832.960 | 8 | 0.0 | 311.5200 | 903.04 | Critical | NaN | 354.120 |
5 rows × 26 columns
ec.tail()
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Postal Code | City | ... | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | Shipping Cost | Order Priority | Unnamed: 24 | values | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 51285 | 29002 | IN-2015-KE1642066-42174 | 2015-06-19 | 2015-06-19 | Same Day | KE-1642066 | Katrina Edelman | Corporate | NaN | Kure | ... | Fasteners | Advantus Thumb Tacks, 12 Pack | 65.10 | 5 | 0.0 | 4.5000 | 1.010 | Medium | NaN | 13.020 |
| 51286 | 34337 | US-2014-ZD21925140-41765 | 2014-05-06 | 2014-05-10 | Standard Class | ZD-219251408 | Zuschuss Donatelli | Consumer | 37421.0 | Chattanooga | ... | Furnishings | Eldon Image Series Desk Accessories, Burgundy | 16.72 | 5 | 0.2 | 3.3440 | 1.930 | High | NaN | 3.344 |
| 51287 | 31315 | CA-2012-ZD21925140-41147 | 2012-08-26 | 2012-08-31 | Second Class | ZD-219251404 | Zuschuss Donatelli | Consumer | 94109.0 | San Francisco | ... | Art | Newell 341 | 8.56 | 2 | 0.0 | 2.4824 | 1.580 | High | NaN | 4.280 |
| 51288 | 9596 | MX-2013-RB1979518-41322 | 2013-02-17 | 2013-02-21 | Standard Class | RB-1979518 | Ross Baird | Home Office | NaN | Valinhos | ... | Binders | Acco Index Tab, Economy | 13.44 | 2 | 0.0 | 2.4000 | 1.003 | Medium | NaN | 6.720 |
| 51289 | 6147 | MX-2013-MC1810093-41416 | 2013-05-22 | 2013-05-26 | Second Class | MC-1810093 | Mick Crebagga | Consumer | NaN | Tipitapa | ... | Paper | Eaton Computer Printout Paper, 8.5 x 11 | 61.38 | 3 | 0.0 | 1.8000 | 1.002 | High | NaN | 20.460 |
5 rows × 26 columns
ec.shape
(51290, 26)
ec.columns
Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
'Customer ID', 'Customer Name', 'Segment', 'Postal Code', 'City',
'State', 'Country', 'Region', 'Market', 'Product ID', 'Category',
'Sub-Category', 'Product Name', 'Sales', 'Quantity', 'Discount',
'Profit', 'Shipping Cost', 'Order Priority', 'Unnamed: 24', 'values'],
dtype='object')
ec1=pd.read_csv("D:\Dataset_MidTerm\Return.csv")
ec1=pd.read_csv("D:\Dataset_MidTerm\Return.csv")
ec1
| Returned | Order ID | Region | |
|---|---|---|---|
| 0 | Yes | CA-2012-SA20830140-41210 | Central US |
| 1 | Yes | IN-2012-PB19210127-41259 | Eastern Asia |
| 2 | Yes | CA-2012-SC20095140-41174 | Central US |
| 3 | Yes | IN-2015-JH158207-42140 | Oceania |
| 4 | Yes | IN-2014-LC168857-41747 | Oceania |
| ... | ... | ... | ... |
| 1074 | Yes | IN-2014-DA1345058-41769 | Southern Asia |
| 1075 | Yes | US-2013-HG14845140-41530 | Eastern US |
| 1076 | Yes | US-2013-SJ2021582-41543 | Central America |
| 1077 | Yes | CA-2015-EB13870140-42269 | Eastern US |
| 1078 | Yes | EG-2014-PM894038-41846 | North Africa |
1079 rows × 3 columns
ec1.columns
Index(['Returned', 'Order ID', 'Region'], dtype='object')
ec1.groupby(['Order ID','Region'])['Returned'].sum().sort_values(ascending=True)
Order ID Region
AE-2014-MY7380138-42004 Western Asia Yes
IT-2015-ME18010124-42235 Northern Europe Yes
IT-2015-RB19570120-42181 Southern Europe Yes
IV-2014-AM70529-41678 Western Africa Yes
IV-2014-LC688529-41898 Western Africa Yes
...
ES-2015-CW11905139-42288 Northern Europe Yes
ES-2015-DK1309045-42158 Western Europe Yes
ES-2015-DK13375139-42082 Northern Europe Yes
ES-2015-BF11170139-42174 Northern Europe Yes
ZA-2015-SW10350146-42061 Eastern Africa Yes
Name: Returned, Length: 1079, dtype: object
ec2=pd.read_excel("D:\Dataset_MidTerm\Ecommerce_data1.xlsx")
ec2
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Postal Code | City | ... | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | Shipping Cost | Order Priority | Unnamed: 24 | values | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 40098 | CA-2014-AB10015140-41954 | 2014-11-11 | 2014-11-13 | First Class | AB-100151402 | Aaron Bergman | Consumer | 73120.0 | Oklahoma City | ... | Phones | Samsung Convoy 3 | 221.980 | 2 | 0.0 | 62.1544 | 40.770 | High | NaN | 110.990 |
| 1 | 26341 | IN-2014-JR162107-41675 | 2014-02-05 | 2014-02-07 | Second Class | JR-162107 | Justin Ritter | Corporate | NaN | Wollongong | ... | Chairs | Novimex Executive Leather Armchair, Black | 3709.395 | 9 | 0.1 | -288.7650 | 923.630 | Critical | NaN | 412.155 |
| 2 | 25330 | IN-2014-CR127307-41929 | 2014-10-17 | 2014-10-18 | First Class | CR-127307 | Craig Reiter | Consumer | NaN | Brisbane | ... | Phones | Nokia Smart Phone, with Caller ID | 5175.171 | 9 | 0.1 | 919.9710 | 915.490 | Medium | NaN | 575.019 |
| 3 | 13524 | ES-2014-KM1637548-41667 | 2014-01-28 | 2014-01-30 | First Class | KM-1637548 | Katherine Murray | Home Office | NaN | Berlin | ... | Phones | Motorola Smart Phone, Cordless | 2892.510 | 5 | 0.1 | -96.5400 | 910.160 | Medium | NaN | 578.502 |
| 4 | 47221 | SG-2014-RH9495111-41948 | 2014-11-05 | 2014-11-06 | Same Day | RH-9495111 | Rick Hansen | Consumer | NaN | Dakar | ... | Copiers | Sharp Wireless Fax, High-Speed | 2832.960 | 8 | 0.0 | 311.5200 | 903.040 | Critical | NaN | 354.120 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51285 | 29002 | IN-2015-KE1642066-42174 | 2015-06-19 | 2015-06-19 | Same Day | KE-1642066 | Katrina Edelman | Corporate | NaN | Kure | ... | Fasteners | Advantus Thumb Tacks, 12 Pack | 65.100 | 5 | 0.0 | 4.5000 | 1.010 | Medium | NaN | 13.020 |
| 51286 | 34337 | US-2014-ZD21925140-41765 | 2014-05-06 | 2014-05-10 | Standard Class | ZD-219251408 | Zuschuss Donatelli | Consumer | 37421.0 | Chattanooga | ... | Furnishings | Eldon Image Series Desk Accessories, Burgundy | 16.720 | 5 | 0.2 | 3.3440 | 1.930 | High | NaN | 3.344 |
| 51287 | 31315 | CA-2012-ZD21925140-41147 | 2012-08-26 | 2012-08-31 | Second Class | ZD-219251404 | Zuschuss Donatelli | Consumer | 94109.0 | San Francisco | ... | Art | Newell 341 | 8.560 | 2 | 0.0 | 2.4824 | 1.580 | High | NaN | 4.280 |
| 51288 | 9596 | MX-2013-RB1979518-41322 | 2013-02-17 | 2013-02-21 | Standard Class | RB-1979518 | Ross Baird | Home Office | NaN | Valinhos | ... | Binders | Acco Index Tab, Economy | 13.440 | 2 | 0.0 | 2.4000 | 1.003 | Medium | NaN | 6.720 |
| 51289 | 6147 | MX-2013-MC1810093-41416 | 2013-05-22 | 2013-05-26 | Second Class | MC-1810093 | Mick Crebagga | Consumer | NaN | Tipitapa | ... | Paper | Eaton Computer Printout Paper, 8.5 x 11 | 61.380 | 3 | 0.0 | 1.8000 | 1.002 | High | NaN | 20.460 |
51290 rows × 26 columns
ec3 = pd.ExcelFile("D:\Dataset_MidTerm\Ecommerce_data.xlsx")
ec3
<pandas.io.excel._base.ExcelFile at 0x131c9ebbf90>
print(ec3.sheet_names)
['Orders', 'Returns', 'People']
o_df = pd.read_excel(ec3, 'Orders')
r_df = pd.read_excel(ec3, 'Returns')
o_df.head(), r_df.head()
( Row ID Order ID Order Date Ship Date Ship Mode \
0 40098 CA-2014-AB10015140-41954 2014-11-11 2014-11-13 First Class
1 26341 IN-2014-JR162107-41675 2014-02-05 2014-02-07 Second Class
2 25330 IN-2014-CR127307-41929 2014-10-17 2014-10-18 First Class
3 13524 ES-2014-KM1637548-41667 2014-01-28 2014-01-30 First Class
4 47221 SG-2014-RH9495111-41948 2014-11-05 2014-11-06 Same Day
Customer ID Customer Name Segment Postal Code City \
0 AB-100151402 Aaron Bergman Consumer 73120.0 Oklahoma City
1 JR-162107 Justin Ritter Corporate NaN Wollongong
2 CR-127307 Craig Reiter Consumer NaN Brisbane
3 KM-1637548 Katherine Murray Home Office NaN Berlin
4 RH-9495111 Rick Hansen Consumer NaN Dakar
... Sub-Category Product Name Sales \
0 ... Phones Samsung Convoy 3 221.980
1 ... Chairs Novimex Executive Leather Armchair, Black 3709.395
2 ... Phones Nokia Smart Phone, with Caller ID 5175.171
3 ... Phones Motorola Smart Phone, Cordless 2892.510
4 ... Copiers Sharp Wireless Fax, High-Speed 2832.960
Quantity Discount Profit Shipping Cost Order Priority Unnamed: 24 \
0 2 0.0 62.1544 40.77 High NaN
1 9 0.1 -288.7650 923.63 Critical NaN
2 9 0.1 919.9710 915.49 Medium NaN
3 5 0.1 -96.5400 910.16 Medium NaN
4 8 0.0 311.5200 903.04 Critical NaN
values
0 110.990
1 412.155
2 575.019
3 578.502
4 354.120
[5 rows x 26 columns],
Returned Order ID Region
0 Yes CA-2012-SA20830140-41210 Central US
1 Yes IN-2012-PB19210127-41259 Eastern Asia
2 Yes CA-2012-SC20095140-41174 Central US
3 Yes IN-2015-JH158207-42140 Oceania
4 Yes IN-2014-LC168857-41747 Oceania)
concat_df = pd.merge(o_df, r_df, on='Order ID')# Order id is a primary key
concat_df
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Postal Code | City | ... | Sales | Quantity | Discount | Profit | Shipping Cost | Order Priority | Unnamed: 24 | values | Returned | Region_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 30191 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | NaN | Taipei | ... | 1715.160 | 2 | 0.0 | 720.3600 | 725.570 | Critical | NaN | 857.580 | Yes | Eastern Asia |
| 1 | 30190 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | NaN | Taipei | ... | 2197.500 | 5 | 0.0 | 153.7500 | 627.270 | Critical | NaN | 439.500 | Yes | Eastern Asia |
| 2 | 30187 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | NaN | Taipei | ... | 1356.030 | 3 | 0.0 | 311.8500 | 458.970 | Critical | NaN | 452.010 | Yes | Eastern Asia |
| 3 | 30193 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | NaN | Taipei | ... | 882.150 | 5 | 0.0 | 114.6000 | 203.920 | Critical | NaN | 176.430 | Yes | Eastern Asia |
| 4 | 30189 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | NaN | Taipei | ... | 148.320 | 3 | 0.0 | 68.2200 | 25.480 | Critical | NaN | 49.440 | Yes | Eastern Asia |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2215 | 35040 | CA-2014-WB21850140-41654 | 2014-01-15 | 2014-01-21 | Standard Class | WB-218501406 | William Brown | Consumer | 10011.0 | New York City | ... | 49.536 | 3 | 0.2 | 17.3376 | 2.670 | Medium | NaN | 16.512 | Yes | Eastern US |
| 2216 | 35038 | CA-2014-WB21850140-41654 | 2014-01-15 | 2014-01-21 | Standard Class | WB-218501406 | William Brown | Consumer | 10011.0 | New York City | ... | 16.520 | 4 | 0.0 | 7.5992 | 1.690 | Medium | NaN | 4.130 | Yes | Eastern US |
| 2217 | 25 | US-2013-SJ2021582-41543 | 2013-09-26 | 2013-09-29 | First Class | SJ-2021582 | Sarah Jordon | Consumer | NaN | Hermosillo | ... | 181.116 | 9 | 0.4 | -75.5640 | 1.085 | Medium | NaN | 20.124 | Yes | Central America |
| 2218 | 36974 | CA-2012-ZC21910140-41271 | 2012-12-28 | 2013-01-04 | Standard Class | ZC-219101402 | Zuschuss Carroll | Consumer | 60610.0 | Chicago | ... | 38.976 | 3 | 0.6 | -50.6688 | 5.290 | Low | NaN | 12.992 | Yes | Central US |
| 2219 | 41315 | EG-2014-PM894038-41846 | 2014-07-26 | 2014-07-30 | Standard Class | PM-894038 | Paul MacIntyre | Consumer | NaN | Alexandria | ... | 25.830 | 1 | 0.0 | 9.0300 | 1.050 | Medium | NaN | 25.830 | Yes | North Africa |
2220 rows × 28 columns
concat_df.isnull().any().sum()
2
ec3 = concat_df.dropna(axis=1)
ec3.shape
(2220, 26)
ec3.columns
Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
'Customer ID', 'Customer Name', 'Segment', 'City', 'State', 'Country',
'Region_x', 'Market', 'Product ID', 'Category', 'Sub-Category',
'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit',
'Shipping Cost', 'Order Priority', 'values', 'Returned', 'Region_y'],
dtype='object')
list(ec3)
['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'City', 'State', 'Country', 'Region_x', 'Market', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit', 'Shipping Cost', 'Order Priority', 'values', 'Returned', 'Region_y']
total_returned_value = ec3['Sales'].sum()
print(' Total Returned Value is',total_returned_value)
Total Returned Value is 525932.22736
total_returned_value = ec3['values'].sum()
print('Total Returned Value is',total_returned_value)
Total Returned Value is 157010.21778
returned_value_by_region = ec3.groupby(['Region_y'])[['Sales','values']].sum()
returned_value_by_region
| Sales | values | |
|---|---|---|
| Region_y | ||
| Caribbean | 9679.32828 | 2969.33504 |
| Central Africa | 4038.48000 | 1756.05000 |
| Central America | 44334.79916 | 13078.52248 |
| Central Asia | 359.03100 | 146.19600 |
| Central US | 23306.14500 | 5002.83300 |
| Eastern Africa | 2026.44000 | 1214.04000 |
| Eastern Asia | 47798.04300 | 13874.01300 |
| Eastern Canada | 2639.91000 | 654.06000 |
| Eastern Europe | 9501.21000 | 4476.66000 |
| Eastern US | 23477.32600 | 6992.91000 |
| North Africa | 10193.37000 | 5644.29000 |
| Northern Europe | 17768.64000 | 5526.58200 |
| Oceania | 46943.33100 | 12685.42200 |
| South America | 29845.11212 | 9224.02916 |
| Southeastern Asia | 36967.09830 | 11068.52910 |
| Southern Africa | 7321.86000 | 3246.45000 |
| Southern Asia | 25159.36500 | 8395.40700 |
| Southern Europe | 34226.34300 | 7325.53500 |
| Southern US | 17024.59700 | 4209.27650 |
| Western Africa | 3928.15800 | 2623.65600 |
| Western Asia | 11737.63800 | 5182.62600 |
| Western Canada | 676.50000 | 638.37000 |
| Western Europe | 72669.48450 | 17705.63850 |
| Western US | 44310.01800 | 13369.78700 |
import random
import seaborn as sns
sns.lmplot(x='Quantity',y='Sales',hue='values',data=ec3)
C:\Users\devar\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
<seaborn.axisgrid.FacetGrid at 0x131c6adc810>
ec3['Unit Price'] = ec3['Sales'] / ec3['Quantity']
ec3
C:\Users\devar\AppData\Local\Temp\ipykernel_15912\3027465675.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy ec3['Unit Price'] = ec3['Sales'] / ec3['Quantity']
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | City | State | ... | Sales | Quantity | Discount | Profit | Shipping Cost | Order Priority | values | Returned | Region_y | Unit Price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 30191 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | Taipei | Taipei City | ... | 1715.160 | 2 | 0.0 | 720.3600 | 725.570 | Critical | 857.580 | Yes | Eastern Asia | 857.580 |
| 1 | 30190 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | Taipei | Taipei City | ... | 2197.500 | 5 | 0.0 | 153.7500 | 627.270 | Critical | 439.500 | Yes | Eastern Asia | 439.500 |
| 2 | 30187 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | Taipei | Taipei City | ... | 1356.030 | 3 | 0.0 | 311.8500 | 458.970 | Critical | 452.010 | Yes | Eastern Asia | 452.010 |
| 3 | 30193 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | Taipei | Taipei City | ... | 882.150 | 5 | 0.0 | 114.6000 | 203.920 | Critical | 176.430 | Yes | Eastern Asia | 176.430 |
| 4 | 30189 | IN-2012-PB19210127-41259 | 2012-12-16 | 2012-12-19 | First Class | PB-19210127 | Phillip Breyer | Corporate | Taipei | Taipei City | ... | 148.320 | 3 | 0.0 | 68.2200 | 25.480 | Critical | 49.440 | Yes | Eastern Asia | 49.440 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2215 | 35040 | CA-2014-WB21850140-41654 | 2014-01-15 | 2014-01-21 | Standard Class | WB-218501406 | William Brown | Consumer | New York City | New York | ... | 49.536 | 3 | 0.2 | 17.3376 | 2.670 | Medium | 16.512 | Yes | Eastern US | 16.512 |
| 2216 | 35038 | CA-2014-WB21850140-41654 | 2014-01-15 | 2014-01-21 | Standard Class | WB-218501406 | William Brown | Consumer | New York City | New York | ... | 16.520 | 4 | 0.0 | 7.5992 | 1.690 | Medium | 4.130 | Yes | Eastern US | 4.130 |
| 2217 | 25 | US-2013-SJ2021582-41543 | 2013-09-26 | 2013-09-29 | First Class | SJ-2021582 | Sarah Jordon | Consumer | Hermosillo | Sonora | ... | 181.116 | 9 | 0.4 | -75.5640 | 1.085 | Medium | 20.124 | Yes | Central America | 20.124 |
| 2218 | 36974 | CA-2012-ZC21910140-41271 | 2012-12-28 | 2013-01-04 | Standard Class | ZC-219101402 | Zuschuss Carroll | Consumer | Chicago | Illinois | ... | 38.976 | 3 | 0.6 | -50.6688 | 5.290 | Low | 12.992 | Yes | Central US | 12.992 |
| 2219 | 41315 | EG-2014-PM894038-41846 | 2014-07-26 | 2014-07-30 | Standard Class | PM-894038 | Paul MacIntyre | Consumer | Alexandria | Al Iskandariyah | ... | 25.830 | 1 | 0.0 | 9.0300 | 1.050 | Medium | 25.830 | Yes | North Africa | 25.830 |
2220 rows × 27 columns
returned_value = concat_df.groupby(['Category', 'Sub-Category'])[['Product ID']].sum()
returned_value
| Product ID | ||
|---|---|---|
| Category | Sub-Category | |
| Furniture | Bookcases | FUR-BO-5762FUR-BO-3624FUR-BO-4848FUR-BO-5788FU... |
| Chairs | FUR-CH-5454FUR-CH-5378FUR-CH-5378FUR-CH-4397FU... | |
| Furnishings | FUR-FU-4086FUR-FU-3928FUR-FU-3028FUR-FU-4086FU... | |
| Tables | FUR-TA-5060FUR-TA-5065FUR-TA-3766FUR-TA-3780FU... | |
| Office Supplies | Appliances | OFF-AP-4961OFF-AP-4958OFF-AP-3870OFF-AP-3575OF... |
| Art | OFF-AR-5920OFF-AR-6124OFF-AR-5930OFF-AR-3494OF... | |
| Binders | OFF-BI-2897OFF-BI-3723OFF-BI-4809OFF-BI-4807OF... | |
| Envelopes | OFF-EN-3097OFF-EN-4914OFF-EN-5040OFF-EN-5023OF... | |
| Fasteners | OFF-FA-2943OFF-FA-6207OFF-FA-6204OFF-FA-3064OF... | |
| Labels | OFF-LA-4552OFF-LA-6064OFF-LA-3312OFF-LA-4541OF... | |
| Paper | OFF-PA-4004OFF-PA-6614OFF-PA-4470OFF-PA-6577OF... | |
| Storage | OFF-ST-4516OFF-ST-6263OFF-ST-4057OFF-ST-6031OF... | |
| Supplies | OFF-SU-6182OFF-SU-4319OFF-SU-4120OFF-SU-4317OF... | |
| Technology | Accessories | TEC-AC-4164TEC-AC-4185TEC-AC-4178TEC-AC-5121TE... |
| Copiers | TEC-CO-6012TEC-CO-5991TEC-CO-4593TEC-CO-6001TE... | |
| Machines | TEC-MA-5494TEC-MA-6150TEC-MA-6138TEC-MA-5546TE... | |
| Phones | TEC-PH-3147TEC-PH-5815TEC-PH-3148TEC-PH-3805TE... |
ec5 = ec3[ec3['values'] > 100]
total_returned_products = ec5['Quantity'].sum()
print('The Total Returned Products is', total_returned_products)
The Total Returned Products is 1607
ec3['delivery_days'] = (ec3['Ship Date'] - ec3['Order Date']).dt.days
ec
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Postal Code | City | ... | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | Shipping Cost | Order Priority | Unnamed: 24 | values | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 40098 | CA-2014-AB10015140-41954 | 2014-11-11 | 2014-11-13 | First Class | AB-100151402 | Aaron Bergman | Consumer | 73120.0 | Oklahoma City | ... | Phones | Samsung Convoy 3 | 221.980 | 2 | 0.0 | 62.1544 | 40.770 | High | NaN | 110.990 |
| 1 | 26341 | IN-2014-JR162107-41675 | 2014-02-05 | 2014-02-07 | Second Class | JR-162107 | Justin Ritter | Corporate | NaN | Wollongong | ... | Chairs | Novimex Executive Leather Armchair, Black | 3709.395 | 9 | 0.1 | -288.7650 | 923.630 | Critical | NaN | 412.155 |
| 2 | 25330 | IN-2014-CR127307-41929 | 2014-10-17 | 2014-10-18 | First Class | CR-127307 | Craig Reiter | Consumer | NaN | Brisbane | ... | Phones | Nokia Smart Phone, with Caller ID | 5175.171 | 9 | 0.1 | 919.9710 | 915.490 | Medium | NaN | 575.019 |
| 3 | 13524 | ES-2014-KM1637548-41667 | 2014-01-28 | 2014-01-30 | First Class | KM-1637548 | Katherine Murray | Home Office | NaN | Berlin | ... | Phones | Motorola Smart Phone, Cordless | 2892.510 | 5 | 0.1 | -96.5400 | 910.160 | Medium | NaN | 578.502 |
| 4 | 47221 | SG-2014-RH9495111-41948 | 2014-11-05 | 2014-11-06 | Same Day | RH-9495111 | Rick Hansen | Consumer | NaN | Dakar | ... | Copiers | Sharp Wireless Fax, High-Speed | 2832.960 | 8 | 0.0 | 311.5200 | 903.040 | Critical | NaN | 354.120 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51285 | 29002 | IN-2015-KE1642066-42174 | 2015-06-19 | 2015-06-19 | Same Day | KE-1642066 | Katrina Edelman | Corporate | NaN | Kure | ... | Fasteners | Advantus Thumb Tacks, 12 Pack | 65.100 | 5 | 0.0 | 4.5000 | 1.010 | Medium | NaN | 13.020 |
| 51286 | 34337 | US-2014-ZD21925140-41765 | 2014-05-06 | 2014-05-10 | Standard Class | ZD-219251408 | Zuschuss Donatelli | Consumer | 37421.0 | Chattanooga | ... | Furnishings | Eldon Image Series Desk Accessories, Burgundy | 16.720 | 5 | 0.2 | 3.3440 | 1.930 | High | NaN | 3.344 |
| 51287 | 31315 | CA-2012-ZD21925140-41147 | 2012-08-26 | 2012-08-31 | Second Class | ZD-219251404 | Zuschuss Donatelli | Consumer | 94109.0 | San Francisco | ... | Art | Newell 341 | 8.560 | 2 | 0.0 | 2.4824 | 1.580 | High | NaN | 4.280 |
| 51288 | 9596 | MX-2013-RB1979518-41322 | 2013-02-17 | 2013-02-21 | Standard Class | RB-1979518 | Ross Baird | Home Office | NaN | Valinhos | ... | Binders | Acco Index Tab, Economy | 13.440 | 2 | 0.0 | 2.4000 | 1.003 | Medium | NaN | 6.720 |
| 51289 | 6147 | MX-2013-MC1810093-41416 | 2013-05-22 | 2013-05-26 | Second Class | MC-1810093 | Mick Crebagga | Consumer | NaN | Tipitapa | ... | Paper | Eaton Computer Printout Paper, 8.5 x 11 | 61.380 | 3 | 0.0 | 1.8000 | 1.002 | High | NaN | 20.460 |
51290 rows × 26 columns
same_day = ec3[ec3['delivery_days'] == 0].shape[0]
within_7_days = ec3[(ec3['delivery_days'] > 0) & (ec3['delivery_days'] <= 7)].shape[0]
within_15_days = ec3[(ec3['delivery_days'] > 7) & (ec3['delivery_days'] <= 15)].shape[0]
more_than_30_days = ec3[ec3['delivery_days'] > 30].shape[0]
print(f"Same Day: {same_day}")
print(f"Within 7 Days: {within_7_days}")
print(f"Within 15 Days: {within_15_days}")
print(f"More than 30 Days: {more_than_30_days}")
Same Day: 115 Within 7 Days: 2105 Within 15 Days: 0 More than 30 Days: 0
from scipy.stats import pearsonr
pearsonr(ec3['values'],ec3['Quantity'])
PearsonRResult(statistic=-0.028330344451430955, pvalue=0.18208808203216745)
order_priority = ec3.groupby('Order Priority')['delivery_days'].describe()
print(order_priority)
count mean std min 25% 50% 75% max Order Priority Critical 188.0 1.750000 1.135829 0.0 1.0 2.0 3.0 3.0 High 643.0 3.205288 1.453807 0.0 2.0 4.0 4.0 5.0 Low 125.0 6.552000 0.499290 6.0 6.0 7.0 7.0 7.0 Medium 1264.0 4.528481 1.393053 0.0 4.0 5.0 5.0 7.0
from scipy import stats
anova_result = stats.f_oneway(
ec3[ec3['Order Priority'] == 'Low']['delivery_days'],
ec3[ec3['Order Priority'] == 'Medium']['delivery_days'],
ec3[ec3['Order Priority'] == 'High']['delivery_days'],
ec3[ec3['Order Priority'] == 'Critical']['delivery_days'])
print(f"ANOVA result: {anova_result}")
ANOVA result: F_onewayResult(statistic=456.90010105295534, pvalue=4.523164187330846e-231)